home <- here::here()
# Load libraries
library(tidyverse)
library(tidylog)
library(RCurl)
library(gllvm)
library(RColorBrewer)
library(ggrepel)
library(vegan)
library(patchwork)
library(RCurl)
library(devtools)
library(ggpubr)
library(janitor)
library(brms)
library(modelr)
library(tidybayes)
library(ggstats)
library(broom)
library(broom.mixed)
# Source map-plot
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/map-plot.R")
#source(paste0(home, "R/functions/map-plot.R"))
# Source code for rotating ordination plot (to make it work in ggplot)
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/rotate.R")
#source(paste0(home, "R/functions/rotate.R"))Diet (dis)-similarity
Load packages
Read data
d <- read_csv(paste0(home, "/data/clean/aggregated_stomach_data.csv"))Diet similarity using bar plots and ordination
Ordination using gllvm
d2 <- d %>%
dplyr::select(ends_with("_tot"))
d2$pred_id <- d$pred_id
d2 <- d2 %>%
left_join(d %>% dplyr::select(pred_length_cm, pred_weight_g, pred_id, species), by = "pred_id")left_join: added 3 columns (pred_length_cm, pred_weight_g, species)
> rows only in x 0
> rows only in y ( 0)
> matched rows 9,649
> =======
> rows total 9,649
# Calculate relative prey weight and average by size-class
d3 <- d2 %>%
pivot_longer(ends_with("_tot")) %>%
mutate(value = value/pred_weight_g) %>%
filter(pred_length_cm >= 10 & pred_length_cm <= 50) %>% # These are the sizes we work with
#filter(value < quantile(value, probs = 0.995)) %>%
# mutate(value = ifelse(value > quantile(value, probs = 0.975),
# quantile(value, probs = 0.975),
# value)) %>%
mutate(pred_length_cm = cut_width(pred_length_cm, 2),
pred_length_cm = str_remove(pred_length_cm, "[(]")) %>%
separate_wider_delim(pred_length_cm, delim = ",", names = c("pred_length_cm", "scrap")) %>%
mutate(pred_length_cm = ifelse(pred_length_cm == "[9", "9", pred_length_cm),
pred_length_cm = as.numeric(pred_length_cm)) %>%
dplyr::select(-scrap) %>%
group_by(species, pred_length_cm, name) %>%
summarise(tot_prey_weight = mean(value)) %>%
ungroup() %>%
mutate(name = str_replace(name, "_", " "),
name = str_replace(name, "_", " "),
name = str_remove(name, " tot"),
name = str_to_title(name))pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
filter: removed 7,065 rows (5%), 137,670 rows remaining
summarise: now 585 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
d3 %>% filter(tot_prey_weight > 0) %>% arrange(tot_prey_weight)filter: removed 174 rows (30%), 411 rows remaining
# A tibble: 411 × 4
species pred_length_cm name tot_prey_weight
<chr> <dbl> <chr> <dbl>
1 Cod 43 Polychaeta 0.0000000895
2 Cod 35 Non Bio 0.000000214
3 Cod 47 Amphipoda 0.000000283
4 Cod 45 Polychaeta 0.000000292
5 Cod 41 Bivalvia 0.000000365
6 Flounder 33 Mysidae 0.000000366
7 Cod 43 Amphipoda 0.000000406
8 Cod 45 Bivalvia 0.000000930
9 Flounder 25 Clupea Harengus 0.00000102
10 Cod 41 Amphipoda 0.00000104
# ℹ 401 more rows
d_wide <- d3 %>% pivot_wider(names_from = name, values_from = tot_prey_weight)pivot_wider: reorganized (name, tot_prey_weight) into (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) [was 585x4, now 39x17]
d_mat <- d_wide %>%
dplyr::select(-species, -pred_length_cm) %>%
as.matrix()
nrow(d_mat)[1] 39
# Fit gllvm model with two latend variables and no covariates, meaning it becomes an ordination
set.seed(999)
fit <- gllvm(y = d_mat, family = "tweedie", num.lv = 2)Note that, the tweedie family is implemented using the extended variational approximation method.
fitCall:
gllvm(y = d_mat, family = "tweedie", num.lv = 2)
family:
[1] "tweedie"
method:
[1] "VA"
log-likelihood: 2249.647
Residual degrees of freedom: 526
AIC: -4381.294
AICc: -4367.808
BIC: -4123.369
# Let's do a ggplot-residual instead
res <- residuals(fit)
p <- NCOL(fit$y)
sppind <- 1:p
ds_res <- res$residuals[, sppind]
# Compare with built-in plot from gllvm: plot(fit, which = 2)
ds_res %>%
as.data.frame() %>%
pivot_longer(cols = everything()) %>%
ggplot(aes(sample = value)) +
geom_qq(size = 0.8) +
geom_qq_line() +
labs(y = "Sample quantiles",
x = "Theoretical quantiles")pivot_longer: reorganized (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) into (name, value) [was 39x15, now 585x2]
ggsave(paste0(home, "/figures/supp/qq_ordi_gllvm.pdf"), width = 11, height = 11, units = "cm")
# Plot with GGplot
# Now make the ordination plot. We want to use ggplot, so need to hack it a bit
# See this thread: https://github.com/JenniNiku/gllvm/discussions/116
# ordiplot(fit, biplot = TRUE)
# Extract site scores
LVs = as.data.frame(getLV(fit))
# Bind LV site scores to metadata
# LVs = cbind(LVs, sim.meta)
#
# ordiplot(fittw, biplot = TRUE)
#
# ordiplot(fittw, biplot = TRUE, symbols = TRUE, spp.colors = "white")
#
LVs2 <- rotate(fit)
# text?
labs <- LVs2$species %>%
as.data.frame() %>%
rownames_to_column(var = "prey")
dd <- LVs2$sites %>%
as.data.frame() %>%
bind_cols(d_wide %>% dplyr::select(species, pred_length_cm)) %>%
mutate(group = "Flounder",
group = ifelse(species == "Cod" & pred_length_cm <= 25, "Small cod", group),
group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group))
# Alternative palettes for the groups
pal <- (brewer.pal(n = 11, name = "RdYlBu")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 11, name = "RdYlGn")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 8, name = "Dark2")[c(8, 7, 6)])
#pal <- brewer.pal(n = 8, name = "Dark2")[c(2, 7, 6)]
ordi <- ggplot(dd, aes(x = V1, y = V2, color = factor(group, levels = c("Flounder", "Small cod", "Large cod")),
size = pred_length_cm)) +
geom_point(alpha = 0.6) +
# stat_ellipse(aes(V1, V2, color = group),
# inherit.aes = FALSE, linewidth = 1, alpha = 0.3) +
scale_radius(range = c(0.8, 4)) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
labs(size = "Predator length [cm]", shape = "Species", fill = "Group", color = "",
x = "Latent variable 1", y = "Latent variable 2") +
geom_label_repel(data = labs, aes(V1, V2, label = prey),
color = "grey20", inherit.aes = FALSE, size = 2, alpha = 0.4,
box.padding = 0.25) +
theme_sleek() +
coord_cartesian(xlim = c(-3, 4), ylim = c(-3, 3)) +
guides(color = guide_legend(ncol = 4),
size = guide_legend(ncol = 4)) +
theme(legend.position = "bottom",
legend.box = "vertical",
legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))
ordiMake a plot of the ontogenetic development of diets
# Calculate relative prey weight and average by size-class
d3 <- d2 %>%
pivot_longer(ends_with("_tot")) %>%
mutate(value = value/pred_weight_g) %>%
group_by(species, pred_length_cm, name) %>%
summarise(tot_prey_weight = mean(value)) %>%
ungroup() %>%
mutate(name = str_replace(name, "_", " "),
name = str_replace(name, "_", " "),
name = str_remove(name, " tot"),
name = str_to_title(name)) %>%
filter(tot_prey_weight < quantile(tot_prey_weight, probs = 0.995))pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
summarise: now 1,545 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
filter: removed 8 rows (1%), 1,537 rows remaining
max_size_cod <- 65
cod_important_prey <- d3 %>%
filter(species == "Cod") %>%
mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) %>%
mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>%
group_by(name, predator_length_grp) %>%
summarise(prey_group_tot = sum(tot_prey_weight)) %>%
ungroup() %>%
group_by(predator_length_grp) %>%
mutate(prop = prey_group_tot / sum(prey_group_tot)) %>%
ungroup() %>%
mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
predator = "Cod")filter: removed 509 rows (33%), 1,028 rows remaining
summarise: now 195 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
max_size_fle <- 40
fle_important_prey <- d3 %>%
filter(species == "Flounder") %>%
mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) %>%
mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>%
group_by(name, predator_length_grp) %>%
summarise(prey_group_tot = sum(tot_prey_weight)) %>%
ungroup() %>%
group_by(predator_length_grp) %>%
mutate(prop = prey_group_tot / sum(prey_group_tot)) %>%
ungroup() %>%
mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
predator = "Flounder")filter: removed 1,028 rows (67%), 509 rows remaining
summarise: now 105 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
area_plot <- bind_rows(fle_important_prey, cod_important_prey) %>%
mutate(prop = replace_na(prop, 0))
n_cat <- nrow(area_plot %>% distinct(name))distinct: removed 285 rows (95%), 15 rows remaining
colourCount <- n_cat
getPalette <- colorRampPalette(brewer.pal(12, "Paired"))
pal2 <- getPalette(colourCount)
area_plot %>% distinct(name)distinct: removed 285 rows (95%), 15 rows remaining
# A tibble: 15 × 1
name
<chr>
1 Amphipoda
2 Bivalvia
3 Clupea Harengus
4 Clupeidae
5 Gadus Morhua
6 Gobiidae
7 Mysidae
8 Non Bio
9 Other
10 Other Crustacea
11 Other Pisces
12 Platichthys Flesus
13 Polychaeta
14 Saduria Entomon
15 Sprattus Sprattus
# Dataframes for geom_text with sample size
n_cod <- d2 %>%
filter(species == "Cod") %>%
mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) %>%
mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>%
group_by(predator_length_grp) %>%
summarise(n = length(unique(pred_id)))filter: removed 3,882 rows (40%), 5,767 rows remaining
summarise: now 13 rows and 2 columns, ungrouped
n_fle <- d2 %>%
filter(species == "Flounder") %>%
mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) %>%
mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>%
group_by(predator_length_grp) %>%
summarise(n = length(unique(pred_id)))filter: removed 5,767 rows (60%), 3,882 rows remaining
summarise: now 7 rows and 2 columns, ungrouped
n_dat <- bind_rows(n_cod %>% mutate(predator = "Cod"),
n_fle %>% mutate(predator = "Flounder")) %>%
mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size))Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
bar_diet <-
ggplot(data = area_plot, aes(x = max_size, y = prop, fill = name, color = name)) +
geom_col(width = 4.3) +
geom_text(data = n_dat, aes(x = max_size, y = 1.08, label = n), inherit.aes = FALSE,
size = 0, color = "white") +
geom_text(data = n_dat, aes(x = max_size, y = 1.04, label = n), inherit.aes = FALSE,
size = 2) +
facet_wrap(~predator, scales = "free") +
scale_fill_manual(values = pal2, name = "") +
scale_color_manual(values = pal2, name = "") +
coord_cartesian(expand = 0) +
scale_x_continuous(breaks = seq(0, 100, 5)) +
labs(y = "Proportion", x = "Max. predator size in group [cm]") +
theme(legend.key.size = unit(0.2, 'cm'),
legend.position = "bottom",
legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))Combine plots
bar_diet / ordi + plot_annotation(tag_levels = "A") #+ plot_layout(heights = c(1, 1.6))ggsave(paste0(home, "/figures/ordi_diet.pdf"), width = 17, height = 21, units = "cm")Schoener’s overlap index
# Calculate relative prey weight and average by size-class
year_rect_sum <- d2 %>%
pivot_longer(ends_with("_tot")) %>%
filter(pred_length_cm > 10 & pred_length_cm < 50) %>% # These are the sizes we work with
left_join(d %>% dplyr::select(year, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") %>%
mutate(group = "Flounder",
group = ifelse(species == "Cod" & pred_length_cm <= 25, "Small cod", group),
group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group)) %>%
group_by(year, ices_rect, group) %>%
summarise(n = length(unique(pred_id)),
tot_prey = sum(value)) %>%
ungroup() %>%
group_by(year, ices_rect) %>%
mutate(min_stom = min(n)) %>%
ungroup() %>%
mutate(id = paste(year, ices_rect, group, sep = "_")) %>%
dplyr::select(-year, -ices_rect, -group) %>%
filter(n > 3)pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
filter: removed 8,475 rows (6%), 136,260 rows remaining
left_join: added 5 columns (year, subdiv, ices_rect, X, Y)
> rows only in x 0
> rows only in y ( 565)
> matched rows 136,260
> =========
> rows total 136,260
summarise: now 280 rows and 5 columns, 2 group variables remaining (year, ices_rect)
ungroup: no grouping variables
ungroup: no grouping variables
filter: removed 34 rows (12%), 246 rows remaining
# year_rect_sum %>% arrange(n) %>% as.data.frame()
# year_rect_sum %>% arrange(min_stom) %>% as.data.frame()
# summary(year_rect_sum$n)
diet_prop <- d2 %>%
pivot_longer(ends_with("_tot")) %>%
filter(pred_length_cm > 10 & pred_length_cm < 50) %>% # These are the sizes we work with
left_join(d %>% dplyr::select(year, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") %>%
mutate(group = "Flounder",
group = ifelse(species == "Cod" & pred_length_cm <= 25, "Small cod", group),
group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group)) %>%
group_by(year, ices_rect, group, name) %>%
summarise(tot_prey_by_grp = sum(value)) %>%
ungroup() %>%
mutate(id = paste(year, ices_rect, group, sep = "_")) %>%
filter(id %in% unique(year_rect_sum$id)) %>%
left_join(year_rect_sum, by = "id") %>%
mutate(prop_prey = tot_prey_by_grp / tot_prey)pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
filter: removed 8,475 rows (6%), 136,260 rows remaining
left_join: added 5 columns (year, subdiv, ices_rect, X, Y)
> rows only in x 0
> rows only in y ( 565)
> matched rows 136,260
> =========
> rows total 136,260
summarise: now 4,200 rows and 5 columns, 3 group variables remaining (year, ices_rect, group)
ungroup: no grouping variables
filter: removed 510 rows (12%), 3,690 rows remaining
left_join: added 3 columns (n, tot_prey, min_stom)
> rows only in x 0
> rows only in y ( 0)
> matched rows 3,690
> =======
> rows total 3,690
# Calculate overlap by year and ices rect and species group. Make the proportions wide!
#unique(diet_prop$group)
diet_prop_wide <- diet_prop %>%
dplyr::select(-tot_prey_by_grp, -tot_prey, -id, -n) %>%
pivot_wider(names_from = group, values_from = prop_prey) %>%
mutate(abs_f_sc = abs(Flounder - `Small cod`),
abs_f_lc = abs(Flounder - `Large cod`),
abs_lc_sc = abs(`Large cod` - `Small cod`)) %>%
group_by(year, ices_rect) %>%
summarise(`Flounder\nSmall cod` = 1 - 0.5*sum(abs_f_sc),
`Flounder\nLarge cod` = 1 - 0.5*sum(abs_f_lc),
`Small cod\nLarge cod` = 1 - 0.5*sum(abs_lc_sc))pivot_wider: reorganized (group, prop_prey) into (Flounder, Large cod, Small cod) [was 3690x6, now 1500x7]
summarise: now 100 rows and 5 columns, one group variable remaining (year)
diet_prop_wide$latitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lat
diet_prop_wide$longitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lon
diet_prop_wide <- sdmTMB::add_utm_columns(diet_prop_wide)Warning: Multiple UTM zones detected.
Proceeding with the most common value.
You may wish to choose a different projection.
Proceeding with UTM zone 33N; CRS = 32633.
Visit https://epsg.io/32633 to verify.
ovr <- diet_prop_wide %>%
pivot_longer(c(`Flounder\nSmall cod`, `Flounder\nLarge cod`, `Small cod\nLarge cod`),
names_to = "overlap_group", values_to = "overlap") %>%
drop_na(overlap)pivot_longer: reorganized (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) into (overlap_group, overlap) [was 100x9, now 300x8]
drop_na (grouped): removed 94 rows (31%), 206 rows remaining
ovr# A tibble: 206 × 8
# Groups: year [8]
year ices_rect latitude longitude X Y overlap_group overlap
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
1 2015 40G4 55.8 14.5 469. 6178. "Flounder\nSmall cod" 0.0721
2 2015 40G4 55.8 14.5 469. 6178. "Flounder\nLarge cod" 0.0210
3 2015 40G4 55.8 14.5 469. 6178. "Small cod\nLarge cod" 0.0346
4 2015 40G6 55.8 16.5 594. 6179. "Flounder\nSmall cod" 0.00160
5 2015 40G6 55.8 16.5 594. 6179. "Flounder\nLarge cod" 0.00240
6 2015 40G6 55.8 16.5 594. 6179. "Small cod\nLarge cod" 0.347
7 2015 41G7 56.2 17.5 655. 6237. "Flounder\nSmall cod" 0.230
8 2015 41G7 56.2 17.5 655. 6237. "Flounder\nLarge cod" 0.0948
9 2015 41G7 56.2 17.5 655. 6237. "Small cod\nLarge cod" 0.222
10 2015 41G8 56.2 18.5 717. 6239. "Flounder\nSmall cod" 0.105
# ℹ 196 more rows
# Plot diet overlap
# plot_map +
# geom_sf(color = "gray80") +
# geom_point(data = ovr, aes(X*1000, Y*1000, color = overlap),
# size = 10) +
# viridis::scale_color_viridis() +
# geom_sf() +
# facet_wrap(~overlap_group, ncol = 2) +
# NULL
set.seed(99)
ps <- ggplot(ovr, aes(overlap_group, overlap, color = ices_rect)) +
geom_jitter(height = 0, width = 0.1, alpha = 0.3) +
geom_boxplot(aes(overlap_group, overlap),
inherit.aes = FALSE, fill = NA, width = 0.2, alpha = 0.2, size = 0.6) +
viridis::scale_color_viridis(discrete = TRUE, name = "ICES\nrectangle") +
geom_hline(yintercept = 0.6, linetype = 2, alpha = 0.6) +
labs(y = "Schoener's overlap index",
x = "") +
theme(legend.position = "bottom",
legend.key.size = unit(0.01, "cm"),
legend.text = element_text(size = 7),
legend.title = element_text(size = 8),
legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))
ps# summary(ovr$overlap)
#
ovr %>% filter(overlap == 0)filter (grouped): removed 202 rows (98%), 4 rows remaining
# A tibble: 4 × 8
# Groups: year [3]
year ices_rect latitude longitude X Y overlap_group overlap
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
1 2018 42G6 56.8 16.5 592. 6291. "Flounder\nLarge cod" 0
2 2021 43G6 57.2 16.5 591. 6346. "Flounder\nSmall cod" 0
3 2021 43G6 57.2 16.5 591. 6346. "Flounder\nLarge cod" 0
4 2022 42G6 56.8 16.5 592. 6291. "Flounder\nLarge cod" 0
How does the diet overlap relate to biomass density of the species?
Trawl data
## Read in trawl data. There's one file for cod and one for flounder
# They go up to 2020 Q1
# trawl_surveys_cod <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv", sep = ";")
# trawl_surveys_fle <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv", sep = ";")
trawl_surveys_cod <- read_delim(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv"), delim = ";")Rows: 9199 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (10): Species, Index, Appr. status, Validity, Station name, ICES, Dist....
dbl (13): Year, Quarter, Month, Seqno, Haul, Subsam, Processing, Size, Subd...
num (7): Lat, Long, Bottom depth, Doorspread, Opening, Tot. no./hour, No./...
lgl (4): Sex, Headropedepth, DNA sampleID, NumberToSample
date (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_fle <- read_delim(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv"), delim = ";")Rows: 7306 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (10): Species, Index, Appr. status, Validity, Station name, ICES, Dist....
dbl (13): Year, Quarter, Month, Seqno, Haul, Subsam, Processing, Size, Subd...
num (7): Lat, Long, Bottom depth, Doorspread, Opening, Tot. no./hour, No./...
lgl (4): Sex, Headropedepth, DNA sampleID, NumberToSample
date (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine for the two species, filter and clean!
trawl_data <- rbind(trawl_surveys_cod, trawl_surveys_fle) %>%
clean_names() %>%
mutate(swept_area = as.numeric(gsub(",", "\\.", swept_area)),
kg_hour = as.numeric(gsub(",", "\\.", kg_hour)),
dist = as.numeric(gsub(",", "\\.", dist))) %>%
as.data.frame()
str(trawl_data)'data.frame': 16505 obs. of 35 variables:
$ species : chr "cod" "cod" "cod" "cod" ...
$ date : Date, format: "2015-02-26" "2015-02-26" ...
$ year : num 2015 2015 2015 2015 2015 ...
$ quarter : num 1 1 1 1 1 1 1 1 1 1 ...
$ month : num 2 2 2 2 2 2 2 2 2 2 ...
$ index : chr "OXBH.2015.2" "OXBH.2015.2" "OXBH.2015.2" "OXBH.2015.2" ...
$ appr_status : chr "Locked" "Locked" "Locked" "Locked" ...
$ seqno : num 10048 10048 10048 10048 10048 ...
$ haul : num 2 2 2 2 2 2 2 2 2 2 ...
$ validity : chr "V" "V" "V" "V" ...
$ station_name : chr "SLAGGENABBEN" "SLAGGENABBEN" "SLAGGENABBEN" "SLAGGENABBEN" ...
$ subsam : num 0 0 0 0 0 0 0 0 0 0 ...
$ processing : num 0 0 0 0 0 0 0 0 0 0 ...
$ size : num 9 9 9 9 9 9 9 9 9 9 ...
$ sex : logi NA NA NA NA NA NA ...
$ subdiv : num 25 25 25 25 25 25 25 25 25 25 ...
$ rect : num 3959 3959 3959 3959 3959 ...
$ ices : chr "39G4" "39G4" "39G4" "39G4" ...
$ lat : num 55289 55289 55289 55289 55289 ...
$ long : num 143628 143628 143628 143628 143628 ...
$ bottom_depth : num 603 603 603 603 603 603 603 603 603 603 ...
$ duration : num 30 30 30 30 30 30 30 30 30 30 ...
$ dist : num 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 ...
$ doorspread : num 875 875 875 875 875 875 875 875 875 875 ...
$ opening : num 56 56 56 56 56 56 56 56 56 56 ...
$ headropedepth : logi NA NA NA NA NA NA ...
$ swept_area : num 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 ...
$ kg_hour : num 1606 1606 1606 1606 1606 ...
$ tot_no_hour : num 57467 57467 57467 57467 57467 ...
$ lengthcl : num 200 210 220 230 240 250 260 270 280 290 ...
$ no_hour : num 552 368 736 184 1472 ...
$ length_measure_type: chr "Total length TL" "Total length TL" "Total length TL" "Total length TL" ...
$ dna_sample_id : logi NA NA NA NA NA NA ...
$ number_to_sample : logi NA NA NA NA NA NA ...
$ smhi_serial_no : num 1 1 1 1 1 1 1 1 1 1 ...
sort(unique(trawl_data$lat)) %>% head(20) [1] 5541 5542 5608 5613 5619 5629 5632 5634 5703 5706 5712 5717 5721 5722 5723
[16] 5728 5729 5743 5746 5750
# For these coordinates, we can use the function Fede provided:
format.position <- function(x){
sign.x <- sign(x)
x <- abs(x)
x <- ifelse(nchar(x)==3, paste("0",x,sep=""), x)
x <- ifelse(nchar(x)==2, paste("00",x,sep=""), x)
x <- ifelse(nchar(x)==1, paste("000",x,sep=""), x)
dec.x <- as.numeric(paste(substring(x,1,2)))+as.numeric(paste(substring(x,3,4)))/60
dec.x <- sign.x*dec.x
}
# Apply function
trawl_data$lat <- format.position(trawl_data$lat)
trawl_data$lon <- format.position(trawl_data$long)
trawl_data <- sdmTMB::add_utm_columns(trawl_data, ll_names = c("lon", "lat"))Warning: Multiple UTM zones detected.
Proceeding with the most common value.
You may wish to choose a different projection.
Proceeding with UTM zone 33N; CRS = 32633.
Visit https://epsg.io/32633 to verify.
# SMHI serial no?
t <- trawl_data %>% drop_na(smhi_serial_no)drop_na: removed 2,390 rows (14%), 14,115 rows remaining
plot_map +
geom_point(data = trawl_data, aes(X*1000, Y*1000, color = factor(year)),
alpha = 0.5, size = 0.3) +
theme_sleek(base_size = 8)Warning: `guide_colourbar()` needs continuous scales.
Warning: Removed 75 rows containing missing values or values outside the scale range
(`geom_point()`).
Read and join the biologial data
trawl_surveys_s_cod <- read_delim(paste0(home, "/data/stomach-data/BITS/biological/Trawl Surveys (S) COD.csv"), delim = ";")Rows: 10381 Columns: 33
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (9): Appr. status, Sampling type, Validity, Station name, ICES, Specie...
dbl (22): Year, Quarter, Month, Trip No, Seqno, Haul, Subsam, Processing, S...
lgl (1): Maturity No (SMSF)
date (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_s_flounder <- read_delim(paste0(home, "/data/stomach-data/BITS/biological/Trawl Surveys (S) FLE.csv"), delim = ";")Rows: 13946 Columns: 33
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (9): Appr. status, Sampling type, Validity, Station name, ICES, Specie...
dbl (20): Year, Quarter, Month, Trip No, Seqno, Haul, Subsam, Processing, S...
lgl (3): Age quality, Age grading, Maturity No (SMSF)
date (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_s_cod$Species <- "Cod"
trawl_surveys_s_flounder$Species <- "Flounder"
bio_data <- bind_rows(trawl_surveys_s_cod, trawl_surveys_s_flounder) %>%
clean_names()
# Join the trawl, bio and stomach data. First create a unique ID.
# In earlier versions I had a column called otolith number (fish ID!), which was really fish id, but it isn't here anymore.
# Add a new species code in the trawl data that matches the stomach data
trawl_data <- trawl_data %>% mutate(predator_code = ifelse(species == "cod", "COD", "FLE"))
bio_data <- bio_data %>% mutate(predator_code = ifelse(species == "Cod", "COD", "FLE"))
unique(is.na(trawl_data[, c("year", "quarter", "haul")])) year quarter haul
[1,] FALSE FALSE FALSE
# Go ahead
trawl_data$haul_id <- paste(trawl_data$year,
trawl_data$quarter,
trawl_data$haul,
sep = "_")
# Should be a unique ID per length and predator code
trawl_data %>%
group_by(haul_id, lengthcl, predator_code) %>%
mutate(n = n()) %>%
ungroup() %>%
distinct(n)ungroup: no grouping variables
distinct: removed 16,504 rows (>99%), one row remaining
# A tibble: 1 × 1
n
<int>
1 1
# Now we want the cpue of a species-size combination as a column, then make it distinct per haul
trawl_data_unique_haul <- trawl_data %>%
dplyr::select(-species, -lengthcl, -predator_code, -kg_hour, -tot_no_hour, -no_hour, -length_measure_type, -sex, -long) %>% # Remove any column that has anything to do with catch, because that info should come from the species dataframes below. I.e., after left_joining this and the catch data, any 0 zero catches should be NA in the joined data
distinct(haul_id, .keep_all = TRUE)distinct: removed 15,851 rows (96%), 654 rows remaining
trawl_data_cod <- trawl_data %>%
filter(!validity == "I") %>%
filter(predator_code == "COD") %>%
mutate(kg = kg_hour * (duration / 60),
kg_km2 = kg / swept_area) %>% # make it biomass density
drop_na(kg_km2) %>%
mutate(size_group = ifelse(lengthcl >= 100 & lengthcl < 250, "small", NA),
size_group = ifelse(lengthcl >= 250 & lengthcl < 500, "medium", size_group)) %>%
group_by(haul_id, size_group) %>%
summarise(kg_km2 = sum(kg_km2)) %>%
filter(size_group %in% c("small", "medium")) %>%
pivot_wider(names_from = size_group, values_from = kg_km2) %>%
rename(mcod_kg_km2 = medium,
scod_kg_km2 = small) %>%
mutate(mcod_kg_km2 = replace_na(mcod_kg_km2, 0),
scod_kg_km2 = replace_na(scod_kg_km2, 0))filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 7,290 rows (44%), 9,183 rows remaining
drop_na: removed 169 rows (2%), 9,014 rows remaining
summarise: now 1,130 rows and 3 columns, one group variable remaining (haul_id)
filter (grouped): removed 253 rows (22%), 877 rows remaining
pivot_wider: reorganized (size_group, kg_km2) into (medium, small) [was 877x3, now 468x3]
rename: renamed 2 variables (mcod_kg_km2, scod_kg_km2)
# Ok, so because this is catches (no hauls with no fish... the only reason I have 0 catches is if one of the size-groups doesn't have a catch!)
# Check with a haul_id
trawl_data %>%
filter(predator_code == "COD" & haul_id == "2015_1_20") filter: removed 16,500 rows (>99%), 5 rows remaining
species date year quarter month index appr_status seqno haul
1 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
2 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
3 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
4 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
5 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
validity station_name subsam processing size sex subdiv rect ices
1 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
2 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
3 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
4 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
5 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
lat long bottom_depth duration dist doorspread opening headropedepth
1 56.38333 182938 377 30 1.5 849 6 NA
2 56.38333 182938 377 30 1.5 849 6 NA
3 56.38333 182938 377 30 1.5 849 6 NA
4 56.38333 182938 377 30 1.5 849 6 NA
5 56.38333 182938 377 30 1.5 849 6 NA
swept_area kg_hour tot_no_hour lengthcl no_hour length_measure_type
1 0.472 4.508 10 260 2 Total length TL
2 0.472 4.508 10 330 2 Total length TL
3 0.472 4.508 10 350 2 Total length TL
4 0.472 4.508 10 400 2 Total length TL
5 0.472 4.508 10 410 2 Total length TL
dna_sample_id number_to_sample smhi_serial_no lon X Y
1 NA NA 19 18.48333 715.0414 6254.191
2 NA NA 19 18.48333 715.0414 6254.191
3 NA NA 19 18.48333 715.0414 6254.191
4 NA NA 19 18.48333 715.0414 6254.191
5 NA NA 19 18.48333 715.0414 6254.191
predator_code haul_id
1 COD 2015_1_20
2 COD 2015_1_20
3 COD 2015_1_20
4 COD 2015_1_20
5 COD 2015_1_20
trawl_data_cod# A tibble: 468 × 3
# Groups: haul_id [468]
haul_id mcod_kg_km2 scod_kg_km2
<chr> <dbl> <dbl>
1 2015_1_10 19145. 6892.
2 2015_1_12 7867. 2503.
3 2015_1_14 12351. 6444.
4 2015_1_15 3899. 2481.
5 2015_1_17 3583. 1792.
6 2015_1_2 38232. 8311.
7 2015_1_20 23.9 0
8 2015_1_21 0.401 0
9 2015_1_22 203. 18.4
10 2015_1_25 30.3 10.1
# ℹ 458 more rows
# Now do the same for flounder and join with the cod data, then with haul data!
# Different code because we do not need to worry about size
trawl_data_fle <- trawl_data %>%
filter(!validity == "I") %>%
filter(predator_code == "FLE") %>%
mutate(kg = kg_hour * (duration / 60),
kg_km2 = kg / swept_area) %>% # make it biomass density
#drop_na(kg_km2) %>%
mutate(size_group = ifelse(lengthcl >= 100, "benthic", "all")) %>%
group_by(haul_id, size_group) %>%
summarise(fle_kg_km2 = sum(kg_km2)) %>%
filter(size_group == "benthic")filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 9,183 rows (56%), 7,290 rows remaining
summarise: now 642 rows and 3 columns, one group variable remaining (haul_id)
filter (grouped): removed 152 rows (24%), 490 rows remaining
# Add back summaries to dataframe of unique hauls
trawl_data_unique_haul <- trawl_data_unique_haul %>% filter(!validity == "I")filter: removed 16 rows (2%), 638 rows remaining
trawl_data_wide <- left_join(trawl_data_unique_haul, trawl_data_cod, by = "haul_id")left_join: added 2 columns (mcod_kg_km2, scod_kg_km2)
> rows only in x 170
> rows only in y ( 0)
> matched rows 468
> =====
> rows total 638
trawl_data_wide <- left_join(trawl_data_wide, trawl_data_fle, by = "haul_id")left_join: added 2 columns (size_group, fle_kg_km2)
> rows only in x 148
> rows only in y ( 0)
> matched rows 490
> =====
> rows total 638
# Change the NA's to 0's...
trawl_data_wide <- trawl_data_wide %>%
mutate(scod_kg_km2 = replace_na(scod_kg_km2, 0),
mcod_kg_km2 = replace_na(mcod_kg_km2, 0),
fle_kg_km2 = replace_na(fle_kg_km2, 0))
# Now add in the same ID in the bio_data
unique(is.na(bio_data[, c("year", "quarter", "haul")])) year quarter haul
[1,] FALSE FALSE FALSE
# Go ahead
bio_data$haul_id <- paste(bio_data$year,
bio_data$quarter,
bio_data$haul,
sep = "_")
unique(bio_data$haul_id) %>% head(20) [1] "2015_1_94" "2022_4_249" "2022_4_278" "2021_1_85" "2022_1_53"
[6] "2018_4_9" "2016_4_43" "2017_1_73" "2017_4_2" "2017_4_4"
[11] "2017_4_6" "2019_4_90" "2022_1_82" "2016_1_55" "2022_1_99"
[16] "2022_4_246" "2022_4_254" "2022_4_279" "2021_1_80" "2022_1_57"
# Now join in trawl data into the bio data (and then into stomach data)
colnames(bio_data) [1] "date" "year" "quarter"
[4] "month" "trip_no" "appr_status"
[7] "sampling_type" "seqno" "haul"
[10] "validity" "station_name" "subsam"
[13] "processing" "size" "max_maturity"
[16] "subdiv" "rect" "ices"
[19] "species" "lengthcl" "fishno"
[22] "age" "age_quality" "age_grading"
[25] "weight" "sex" "maturity"
[28] "maturity_no_smsf" "stomach_sample_code" "specimen_note"
[31] "individual_no" "spec_tsn" "day_night"
[34] "predator_code" "haul_id"
colnames(trawl_data_wide) [1] "date" "year" "quarter" "month"
[5] "index" "appr_status" "seqno" "haul"
[9] "validity" "station_name" "subsam" "processing"
[13] "size" "subdiv" "rect" "ices"
[17] "lat" "bottom_depth" "duration" "dist"
[21] "doorspread" "opening" "headropedepth" "swept_area"
[25] "dna_sample_id" "number_to_sample" "smhi_serial_no" "lon"
[29] "X" "Y" "haul_id" "mcod_kg_km2"
[33] "scod_kg_km2" "size_group" "fle_kg_km2"
# Check first for overlapping columns, and if so, if one of the datasets has any NAs
common_cols <- intersect(colnames(bio_data), colnames(trawl_data_wide))
unique(is.na(trawl_data_wide[, common_cols])) date year quarter month appr_status seqno haul validity station_name
[1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
subsam processing size subdiv rect ices haul_id
[1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[2,] TRUE TRUE TRUE FALSE FALSE FALSE FALSE
unique(is.na(bio_data[, common_cols])) date year quarter month appr_status seqno haul validity station_name
[1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
subsam processing size subdiv rect ices haul_id
[1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# Trawl data has some NA's in the common columns. Select only the important columns
colnames(trawl_data_wide)[!colnames(trawl_data_wide) %in% colnames(bio_data)] [1] "index" "lat" "bottom_depth" "duration"
[5] "dist" "doorspread" "opening" "headropedepth"
[9] "swept_area" "dna_sample_id" "number_to_sample" "smhi_serial_no"
[13] "lon" "X" "Y" "mcod_kg_km2"
[17] "scod_kg_km2" "size_group" "fle_kg_km2"
trawl_data_wide <- trawl_data_wide %>% dplyr::select(haul_id, fle_kg_km2, mcod_kg_km2, scod_kg_km2, lat, lon, bottom_depth, X, Y, smhi_serial_no)
bio_data <- left_join(bio_data, trawl_data_wide, by = "haul_id")left_join: added 9 columns (fle_kg_km2, mcod_kg_km2, scod_kg_km2, lat, lon, …)
> rows only in x 0
> rows only in y ( 146)
> matched rows 24,327
> ========
> rows total 24,327
names(bio_data) [1] "date" "year" "quarter"
[4] "month" "trip_no" "appr_status"
[7] "sampling_type" "seqno" "haul"
[10] "validity" "station_name" "subsam"
[13] "processing" "size" "max_maturity"
[16] "subdiv" "rect" "ices"
[19] "species" "lengthcl" "fishno"
[22] "age" "age_quality" "age_grading"
[25] "weight" "sex" "maturity"
[28] "maturity_no_smsf" "stomach_sample_code" "specimen_note"
[31] "individual_no" "spec_tsn" "day_night"
[34] "predator_code" "haul_id" "fle_kg_km2"
[37] "mcod_kg_km2" "scod_kg_km2" "lat"
[40] "lon" "bottom_depth" "X"
[43] "Y" "smhi_serial_no"
Summarise haul data on the ICES rectangle level to join into the diet
# Now calculate spatial overlap in biomass density...
year_rect_sum_biom <- bio_data %>%
dplyr::select(fle_kg_km2, mcod_kg_km2, scod_kg_km2, ices, year) %>%
rename(ices_rect = ices) %>% #,
group_by(year, ices_rect) %>%
summarise(mean_fle = mean(fle_kg_km2),
mean_scod = mean(scod_kg_km2),
mean_mcod = mean(mcod_kg_km2)) rename: renamed one variable (ices_rect)
summarise: now 117 rows and 5 columns, one group variable remaining (year)
# Join into diet data
ovr <- ovr %>%
pivot_wider(names_from = overlap_group, values_from = overlap) %>%
drop_na() %>%
left_join(year_rect_sum_biom)pivot_wider: reorganized (overlap_group, overlap) into (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) [was 206x8, now 82x9]
drop_na (grouped): removed 20 rows (24%), 62 rows remaining
Joining with `by = join_by(year, ices_rect)`
left_join: added 3 columns (mean_fle, mean_scod, mean_mcod)
> rows only in x 0
> rows only in y (55)
> matched rows 62
> ====
> rows total 62
Fit zero inflated beta regressions using brms
#https://www.andrewheiss.com/blog/2021/11/08/beta-regression-guide/#zero-inflated-beta-regression-bayesian-style
# full model
ovr <- ovr %>%
ungroup() %>%
clean_names() %>%
pivot_longer(c("flounder_small_cod", "flounder_large_cod", "small_cod_large_cod")) %>%
mutate(value = ifelse(value < 1e-15, 0, value),
mean_biom_sc = as.numeric(scale(mean_fle + mean_scod + mean_mcod) / 3)#,
)ungroup: no grouping variables
pivot_longer: reorganized (flounder_small_cod, flounder_large_cod, small_cod_large_cod) into (name, value) [was 62x12, now 186x11]
zib_model <- bf(
value ~ 0 + name*mean_biom_sc,
phi ~ 0 + name,
zi ~ 0 + name,
family = zero_inflated_beta()
)
fit <- brm(
formula = zib_model,
data = ovr,
cores = 4,
chains = 4,
iter = 4000
)Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 15.0.0 (clang-1500.1.0.2.5)’
using SDK: ‘MacOSX14.2.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Start sampling
fit Family: zero_inflated_beta
Links: mu = logit; phi = log; zi = logit
Formula: value ~ 0 + name * mean_biom_sc
phi ~ 0 + name
zi ~ 0 + name
Data: ovr (Number of observations: 186)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat
nameflounder_large_cod -2.53 0.15 -2.82 -2.21 1.00
nameflounder_small_cod -1.68 0.13 -1.94 -1.41 1.00
namesmall_cod_large_cod -0.82 0.15 -1.11 -0.52 1.00
mean_biom_sc -0.28 0.38 -1.09 0.38 1.00
nameflounder_small_cod:mean_biom_sc 0.88 0.50 -0.07 1.87 1.00
namesmall_cod_large_cod:mean_biom_sc -0.37 0.63 -1.63 0.86 1.00
phi_nameflounder_large_cod 2.24 0.21 1.81 2.62 1.00
phi_nameflounder_small_cod 1.82 0.19 1.43 2.16 1.00
phi_namesmall_cod_large_cod 0.80 0.17 0.47 1.12 1.00
zi_nameflounder_large_cod -4.68 1.28 -7.81 -2.77 1.00
zi_nameflounder_small_cod -4.69 1.30 -7.78 -2.79 1.00
zi_namesmall_cod_large_cod -4.68 1.33 -7.82 -2.76 1.00
Bulk_ESS Tail_ESS
nameflounder_large_cod 5741 5093
nameflounder_small_cod 7314 5365
namesmall_cod_large_cod 7696 6077
mean_biom_sc 4840 4559
nameflounder_small_cod:mean_biom_sc 5135 4777
namesmall_cod_large_cod:mean_biom_sc 5643 4857
phi_nameflounder_large_cod 5312 5321
phi_nameflounder_small_cod 6836 5789
phi_namesmall_cod_large_cod 7397 5597
zi_nameflounder_large_cod 6500 3389
zi_nameflounder_small_cod 5165 2753
zi_namesmall_cod_large_cod 6506 3694
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
zi_intercept <- tidy(fit, effects = "fixed") %>%
filter(component == "zi") %>%
pull(estimate) Warning in tidy.brmsfit(fit, effects = "fixed"): some parameter names contain
underscores: term naming may be unreliable!
filter: removed 9 rows (75%), 3 rows remaining
# Transformed to a probability/proportion
plogis(zi_intercept)*100 # percent b_zi_nameflounder_large_cod b_zi_nameflounder_small_cod
0.9154595 0.9084557
b_zi_namesmall_cod_large_cod
0.9207962
# Compare with data
ovr %>%
count(value == 0) %>%
mutate(prop = n / sum(n) *100)count: now 2 rows and 2 columns, ungrouped
# A tibble: 2 × 3
`value == 0` n prop
<lgl> <int> <dbl>
1 FALSE 183 98.4
2 TRUE 3 1.61
get_variables(fit) [1] "b_nameflounder_large_cod"
[2] "b_nameflounder_small_cod"
[3] "b_namesmall_cod_large_cod"
[4] "b_mean_biom_sc"
[5] "b_nameflounder_small_cod:mean_biom_sc"
[6] "b_namesmall_cod_large_cod:mean_biom_sc"
[7] "b_phi_nameflounder_large_cod"
[8] "b_phi_nameflounder_small_cod"
[9] "b_phi_namesmall_cod_large_cod"
[10] "b_zi_nameflounder_large_cod"
[11] "b_zi_nameflounder_small_cod"
[12] "b_zi_namesmall_cod_large_cod"
[13] "lprior"
[14] "lp__"
[15] "accept_stat__"
[16] "stepsize__"
[17] "treedepth__"
[18] "n_leapfrog__"
[19] "divergent__"
[20] "energy__"
plot(fit)# https://discourse.mc-stan.org/t/trying-to-understand-default-prior-for-the-hurdle-part/24337
priors <- prior_summary(fit)
priors prior class coef group resp dpar nlpar lb ub
(flat) b
(flat) b mean_biom_sc
(flat) b nameflounder_large_cod
(flat) b nameflounder_small_cod
(flat) b nameflounder_small_cod:mean_biom_sc
(flat) b namesmall_cod_large_cod
(flat) b namesmall_cod_large_cod:mean_biom_sc
(flat) b phi
(flat) b nameflounder_large_cod phi
(flat) b nameflounder_small_cod phi
(flat) b namesmall_cod_large_cod phi
(flat) b zi
(flat) b nameflounder_large_cod zi
(flat) b nameflounder_small_cod zi
(flat) b namesmall_cod_large_cod zi
source
default
(vectorized)
(vectorized)
(vectorized)
(vectorized)
(vectorized)
(vectorized)
default
(vectorized)
(vectorized)
(vectorized)
default
(vectorized)
(vectorized)
(vectorized)
stancode(fit)// generated with brms 2.20.4
functions {
/* zero-inflated beta log-PDF of a single response
* Args:
* y: the response value
* mu: mean parameter of the beta distribution
* phi: precision parameter of the beta distribution
* zi: zero-inflation probability
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_lpdf(real y, real mu, real phi, real zi) {
row_vector[2] shape = [mu * phi, (1 - mu) * phi];
if (y == 0) {
return bernoulli_lpmf(1 | zi);
} else {
return bernoulli_lpmf(0 | zi) +
beta_lpdf(y | shape[1], shape[2]);
}
}
/* zero-inflated beta log-PDF of a single response
* logit parameterization of the zero-inflation part
* Args:
* y: the response value
* mu: mean parameter of the beta distribution
* phi: precision parameter of the beta distribution
* zi: linear predictor for zero-inflation part
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_logit_lpdf(real y, real mu, real phi, real zi) {
row_vector[2] shape = [mu * phi, (1 - mu) * phi];
if (y == 0) {
return bernoulli_logit_lpmf(1 | zi);
} else {
return bernoulli_logit_lpmf(0 | zi) +
beta_lpdf(y | shape[1], shape[2]);
}
}
// zero-inflated beta log-CCDF and log-CDF functions
real zero_inflated_beta_lccdf(real y, real mu, real phi, real zi) {
row_vector[2] shape = [mu * phi, (1 - mu) * phi];
return bernoulli_lpmf(0 | zi) + beta_lccdf(y | shape[1], shape[2]);
}
real zero_inflated_beta_lcdf(real y, real mu, real phi, real zi) {
return log1m_exp(zero_inflated_beta_lccdf(y | mu, phi, zi));
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> K_phi; // number of population-level effects
matrix[N, K_phi] X_phi; // population-level design matrix
int<lower=1> K_zi; // number of population-level effects
matrix[N, K_zi] X_zi; // population-level design matrix
int prior_only; // should the likelihood be ignored?
}
transformed data {
}
parameters {
vector[K] b; // regression coefficients
vector[K_phi] b_phi; // regression coefficients
vector[K_zi] b_zi; // regression coefficients
}
transformed parameters {
real lprior = 0; // prior contributions to the log posterior
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = rep_vector(0.0, N);
// initialize linear predictor term
vector[N] phi = rep_vector(0.0, N);
// initialize linear predictor term
vector[N] zi = rep_vector(0.0, N);
mu += X * b;
phi += X_phi * b_phi;
zi += X_zi * b_zi;
mu = inv_logit(mu);
phi = exp(phi);
for (n in 1:N) {
target += zero_inflated_beta_logit_lpdf(Y[n] | mu[n], phi[n], zi[n]);
}
}
// priors including constants
target += lprior;
}
generated quantities {
}
pp_check(fit) +
coord_cartesian(expand = TRUE) +
labs(x = "Schoener's overlap index",
y = "Density") +
theme(legend.position.inside = c(0.8, 0.8)) +
guides(color = guide_legend(position = "inside"))Using 10 posterior draws for ppc type 'dens_overlay' by default.
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.
ggsave(paste0(home, "/figures/supp/schoener_zib_pp_check.pdf"), width = 11, height = 11, units = "cm")
conditional_effects(fit)set.seed(123)
ovr <- ovr %>%
mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2))
p1 <- ovr %>%
data_grid(name = unique(name),
mean_biom_sc = 0) %>%
mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) %>%
add_epred_draws(fit) %>%
ggplot(aes(.epred, name2)) +
stat_pointinterval(.width = c(.66, .95)) +
geom_vline(xintercept = 0.6, linetype = 2, alpha = 0.6) +
geom_jitter(data = ovr, aes(value, name2), height = 0.2, width = 0, alpha = 0.3, color = "grey50") +
labs(y = "Schoener's overlap index", x = "") +
geom_stripped_rows() +
coord_cartesian(expand = 0, xlim = c(-0.03, 1)) +
theme(plot.margin = unit(c(0, 0.3, 0, 0), "cm")) +
NULL
pal <- brewer.pal(name = "RdBu", n = 7)
# Check CI's
fit %>%
spread_draws(b_mean_biom_sc,
`b_nameflounder_small_cod:mean_biom_sc`,
`b_namesmall_cod_large_cod:mean_biom_sc`) %>%
mutate("Flounder\nLarge cod" = b_mean_biom_sc,
"Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
"Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
) %>%
summarise(mean = mean(`Small cod\nLarge cod`),
upr = quantile(`Small cod\nLarge cod`, probs = 0.845))summarise: now one row and 2 columns, ungrouped
# A tibble: 1 × 2
mean upr
<dbl> <dbl>
1 -0.653 -0.163
p2 <- fit %>%
spread_draws(b_mean_biom_sc,
`b_nameflounder_small_cod:mean_biom_sc`,
`b_namesmall_cod_large_cod:mean_biom_sc`) %>%
mutate("Flounder\nLarge cod" = b_mean_biom_sc,
"Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
"Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
) %>%
pivot_longer(c(`Flounder\nLarge cod`, `Flounder\nSmall cod`, `Small cod\nLarge cod`),
names_to = "Overlap group", values_to = "slope") %>%
ggplot(aes(slope, `Overlap group`, fill = after_stat(x < 0))) +
ggdist::stat_eye() +
labs(x = "", y = "Slope of biomass density\non the mean Schoener's diet overlap") +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.6) +
scale_fill_manual(values = c("gray80", alpha(pal[6], 0.8))) +
theme(legend.position.inside = c(0.15, 0.1)) +
guides(fill = guide_legend(position = "inside")) +
geom_stripped_rows() +
coord_cartesian(expand = 0) +
NULLpivot_longer: reorganized (Flounder
Large cod, Flounder
Small cod, Small cod
Large cod) into (Overlap group, slope) [was 8000x9, now 24000x8]
p2# Conditional
p3 <- ovr %>%
data_grid(name = unique(name),
mean_biom_sc = seq_range(mean_biom_sc, n = 101)) %>%
mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) %>%
add_epred_draws(fit) %>%
ggplot(aes(x = mean_biom_sc, y = value, fill = name2, color = name2)) +
stat_lineribbon(aes(y = .epred), .width = c(.95), alpha = 1/4) +
geom_point(data = ovr, alpha = 0.5) +
scale_fill_brewer(palette = "Pastel1") +
scale_color_brewer(palette = "Set1") +
theme(legend.position.inside = c(0.90, 0.84)) +
labs(y = "Schoener's overlap index", x = "Scaled cod and flounder mean biomass density",
fill = "", color = "") +
guides(fill = guide_legend(position = "inside"))
pp <- (p1 + p2) + plot_layout(axes = "collect")
pp / p3 +
plot_annotation(tag_levels = "A")ggsave(paste0(home, "/figures/schoener_zib.pdf"), width = 17, height = 20, units = "cm")
# What's the actual predictions
sum <- ovr %>%
data_grid(name = unique(name),
mean_biom_sc = 0) %>%
mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) %>%
add_epred_draws(fit) %>%
group_by(name) %>%
summarise(mean = mean(.epred))summarise: now 3 rows and 2 columns, ungrouped
sum# A tibble: 3 × 2
name mean
<chr> <dbl>
1 flounder_large_cod 0.0735
2 flounder_small_cod 0.156
3 small_cod_large_cod 0.302
p1 + geom_vline(data = sum, aes(xintercept = mean))knitr::knit_exit()